# 阶段性复习，按照步骤完成相应网站的音频数据采集
# ------------测试1---------------------
# 待爬取的网站：https://www.ximalaya.com/yinyue/54070700/
# https://www.ximalaya.com/revision/play/v1/audio?id=465295660&ptype=1
# 1、导入包
# 2、向网站服务器发起请求，并打印出状态码
# 3、解析网页的图片数据并打印图片的链接
# 4、获取音频数据下载并适当命名
import requests
from lxml import etree
url = 'https://www.ximalaya.com/yinyue/54070700/'
header = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36"}
response = requests.get(url,headers=header)
print(response.status_code)
ehtml = etree.HTML(response.text)
xtitle = ehtml.xpath("//img[@class='img k_Z']//@src")
musicurl = ehtml.xpath("//li[@class='Mi_']/div/a//@href")
for i in musicurl:
    musicid = i[7:]
    getmusic = "https://www.ximalaya.com/revision/play/v1/audio?id="+ musicid +"&ptype=1"
    response = requests.get(getmusic,headers=header)
    rA = response.json()["data"]["src"]
    musicres = requests.get(rA,headers=header)
    saveName = musicid +'.mp3'
    with open(saveName,'wb') as f:
        f.write(musicres.content)
